
import requests
import pymysql
import json


db = pymysql.connect(host='localhost', port=3306, user='root', passwd='lin199@nenu', db='zhihu', charset='utf8')
cursor = db.cursor()
def fetchHotel(url):
    headers = {
    'authority':'www.zhihu.com',
    'method': 'GET',
    'path': '/',
    'Cache-Control':'no-cache',
    'Accept':'*/*',
    'Accept-Encoding':'gzip, deflate',
    'Connection':'keep-alive',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
    }
    #url="https://www.zhihu.com/api/v4/questions/418254400/answers?include=data[*].is_normal,admin_closed_comment,reward_info,is_collapsed,annotation_action,annotation_detail,collapse_reason,is_sticky,collapsed_by,suggest_edit,comment_count,can_comment,content,editable_content,attachment,voteup_count,reshipment_settings,comment_permission,created_time,updated_time,review_info,relevant_info,question,excerpt,is_labeled,paid_info,paid_info_content,relationship.is_authorized,is_author,voting,is_thanked,is_nothelp,is_recognized;data[*].mark_infos[*].url;data[*].author.follower_count,vip_info,badge[*].topics;data[*].settings.table_of_content.enabled&limit=5&offset=0&platform=desktop&sort_by=default"
    req = requests.get(url,headers=headers)
    #print(req.text)
    text=req.text
    print(text)
    return text

def parseJson(text):
    json_data = json.loads(text)
    lst = json_data['data']
    page = json_data['paging']['next']
    title=lst[0]['question']['title']
    qid=lst[0]['question']['id']
    url="https://www.zhihu.com/question/"+str(qid)
    savequestion(qid,title,url)

    if not lst:
        page=""
    for item in lst:
        question=item['question']['title']
        answer = item['content']
        author = item['author']['name']
        print(author)
        saveanswer(item['question']['id'],author,answer)


    return page
def savequestion(id,title,url):
    try:
        sql ="insert into questions (`id`,`title`,url) values ('"+str(id)+"','"+title+"','"+url+"')"
        cursor.execute(sql)
        db.commit()
    except Exception as e:
        db.rollback()
        print(e)
def saveanswer(id,author,content):
    try:
        sql = "insert into answer (`question`,`author`,`content`) values ('"+str(id)+"','"+author+"','"+content+"')"
        cursor.execute(sql)
        db.commit()
    except Exception as e:
        db.rollback()
        print(e)
        exit()
 


if __name__ == '__main__':
    questionid="391872560"
    ##示例
    ##https://www.zhihu.com/api/v4/questions/460763946/answers?include=data[*].is_normal,admin_closed_comment,reward_info,is_collapsed,annotation_action,annotation_detail,collapse_reason,is_sticky,collapsed_by,suggest_edit,comment_count,can_comment,content,editable_content,attachment,voteup_count,reshipment_settings,comment_permission,created_time,updated_time,review_info,relevant_info,question,excerpt,is_labeled,paid_info,paid_info_content,relationship.is_authorized,is_author,voting,is_thanked,is_nothelp,is_recognized;data[*].mark_infos[*].url;data[*].author.follower_count,vip_info,badge[*].topics;data[*].settings.table_of_content.enabled&limit=15&offset=0&platform=desktop&sort_by=default
    url = "https://www.zhihu.com/api/v4/questions/"+str(questionid)+"/answers?include=data[*].is_normal,admin_closed_comment,reward_info,is_collapsed,annotation_action,annotation_detail,collapse_reason,is_sticky,collapsed_by,suggest_edit,comment_count,can_comment,content,editable_content,attachment,voteup_count,reshipment_settings,comment_permission,created_time,updated_time,review_info,relevant_info,question,excerpt,is_labeled,paid_info,paid_info_content,relationship.is_authorized,is_author,voting,is_thanked,is_nothelp,is_recognized;data[*].mark_infos[*].url;data[*].author.follower_count,vip_info,badge[*].topics;data[*].settings.table_of_content.enabled&limit=15&offset=0&platform=desktop&sort_by=default"
    while url:
        text = fetchHotel(url)
        url = parseJson(text)

